"""
用Zeal和Dash查文档时发现有些文档中嵌入了carbonads的广告，看着不顺眼，写个脚本把这些广告删了。
"""

import os
import re

root = "C:\Program Files\Zeal\docsets\Bootstrap_3.docset\Contents\Resources\Documents"    # dash or zeal文档的位置
substitute_pattern = r'<script .*? id="_carbonads_js">.*?</script>'    # 广告脚本匹配

in_root = os.walk(root)    # 遍历文档文件夹下的内容

for each_tuple in in_root:
    for  eachfile in each_tuple[2]:

        if eachfile[-4:] == "html":    # 只修改HTML文件
            current_target = os.path.join(each_tuple[0],eachfile)

            with open(current_target,"r+",encoding="utf-8") as op_file:    
                raw_text = op_file.read()
                after_clean = re.sub(substitute_pattern, "", raw_text)    # 删掉carbonads的脚本
                op_file.seek(0,0)    # 将文件指针移动到文件头
                op_file.write(after_clean)    # 写回
